rm(list = ls())
library(data.table)
library(plyr)
library(tidyr)
library(lfe)
library(stargazer)
library(xtable)
library(sandwich)
library(roll)
library(readxl)
library(readr)
library(zoo)
library(texreg)
library(DescTools)

m <- data.table(read_xlsx("../Data/MasterData.xlsx", skip = 1, guess_max = 1e4))
m[, age := 2021 - year]

m[, memrableperiod_ownret := memrableperiod_ownret * 100]
m[, memrableperiod_pret := memrableperiod_pret * 100]
m[, memrableperiod_aret := memrableperiod_aret * 100]
m[, bias := (memrableperiod_pret - memrableperiod_aret)]

m[, retrecall_1day_rate := retrecall_1day_rate * 100]
m[, retrecall_30day_rate := retrecall_30day_rate * 100]
m[, retrecall_1year_rate := retrecall_1year_rate * 100]
m[, retrecall_5year_rate := retrecall_5year_rate * 100]

m[, expret30day_market_rate := expret30day_market_rate * 100]
m[, expret1year_market_rate := expret1year_market_rate * 100]
m[, expret30day_self_rate := expret30day_self_rate * 100]
m[, expret1year_self_rate := expret1year_self_rate * 100]


m[, memrableperiod_begin := as.yearmon(memrableperiod_begin, "%Ym%m")]
m[, memrableperiod_end := as.yearmon(memrableperiod_end, "%Ym%m")]
m[, memrableperiod_dist := as.yearmon(2022 + 2 / 12) - (memrableperiod_begin + memrableperiod_end) / 2]

m[, ret_lagy1 := ret_lagy1 * 100]
m[, ret_lagm1 := (ret_lagw1 + ret_lagw2 + ret_lagw3 + ret_lagw4) * 100]
m[, ret_intraday2 := ret_intraday2 * 100]
m[, ret_intraday1 := ret_intraday1 * 100]

C <- data.table(read_xlsx("../Data/AccountData.xlsx", guess_max = 1e4))
C[, aret_1day_v2 := as.numeric(aret_1day_v2) * 100]
C[, aret_0day_v2 := as.numeric(aret_0day_v2) * 100]
C[, aret_4_1week_v2 := as.numeric(aret_4_1week_v2) * 100]
C[, aret_ytd_v2 := as.numeric(aret_ytd_v2) * 100]
m <- merge(m, C[, .(id, aret_1day_v2, aret_0day_v2, aret_4_1week_v2, aret_ytd_v2)],
    by = "id", all.x = T
)

m[, aret_1day_v2 := Winsorize(aret_1day_v2, quantile(aret_1day_v2, probs = c(0.05, 0.95), na.rm = T))]
m[, aret_0day_v2 := Winsorize(aret_0day_v2, quantile(aret_0day_v2, probs = c(0.05, 0.95), na.rm = T))]
m[, aret_4_1week_v2 := Winsorize(aret_4_1week_v2, quantile(aret_4_1week_v2, probs = c(0.05, 0.95), na.rm = T))]
m[, aret_ytd_v2 := Winsorize(aret_ytd_v2, quantile(aret_ytd_v2, probs = c(0.05, 0.95), na.rm = T))]

dim(m[!is.na(aret_1day_v2 + ret_intraday2 + expret30day_self_rate)])

# Panel A, Free recall

sample <- m[!is.na(memrableperiod_aret) & !is.na(memrableperiod_pret) &
    !is.na(expret30day_market_rate) & !is.na(expret1year_market_rate) &
    !is.na(expret30day_self_rate) & !is.na(expret1year_self_rate)]
    

f1 <- felm(expret30day_market_rate ~ memrableperiod_aret + memrableperiod_pret | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f2 <- felm(expret1year_market_rate ~ memrableperiod_aret + memrableperiod_pret | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f3 <- felm(expret30day_self_rate ~ memrableperiod_aret + memrableperiod_pret | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

f4 <- felm(expret1year_self_rate ~ memrableperiod_aret + memrableperiod_pret | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample)

stargazer(f1, f2, f3, f4,
          align = TRUE, dep.var.labels.include = TRUE,
          covariate.labels = c("Actual episode return", "Recalled episode return"),
          omit.stat = c("LL", "ser", "F", "rsq"), ord.intercepts = FALSE, no.space = TRUE,
          single.row = FALSE, column.sep.width = "0pt", digits = 2
)

# Panel B, probed recall, full sample 

# market expectations 
sample1 <- m[!is.na(ret_lagm1) & !is.na(ret_lagy1) &  !is.na(expret30day_market_rate) 
          & !is.na(retrecall_30day_rate) & !is.na(retrecall_1year_rate)]

sample2 <- m[ !is.na(ret_lagm1) & !is.na(ret_lagy1) &  !is.na(expret1year_market_rate)
           & !is.na(retrecall_30day_rate) & !is.na(retrecall_1year_rate)]

f1 <- felm(expret30day_market_rate ~ ret_lagm1 + ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample1)

f2 <- felm(expret30day_market_rate ~ ret_lagm1 + ret_lagy1 + retrecall_30day_rate + retrecall_1year_rate | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample1)

f3 <- felm(expret1year_market_rate ~ ret_lagm1 + ret_lagy1 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample2)

f4 <- felm(expret1year_market_rate ~ ret_lagm1 + ret_lagy1 + retrecall_30day_rate + retrecall_1year_rate  | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample2)

stargazer(f1, f2, f3, f4,
          align = TRUE, dep.var.labels.include = TRUE,
          covariate.labels = c("Actual market return, 1M", "Actual market return, 1Y", "Recalled own return, 1M", "Recalled own return, 1Y"),
          omit.stat = c("LL", "ser", "F", "rsq"), ord.intercepts = FALSE, no.space = TRUE,
          single.row = FALSE, column.sep.width = "0pt", digits = 2
)

# Panel C, probed recall, merged sample
# self expectations 
sample1 <- m[!is.na(aret_4_1week_v2) & !is.na(aret_ytd_v2) &  !is.na(expret30day_self_rate) 
          & !is.na(retrecall_30day_rate) & !is.na(retrecall_1year_rate)]

sample2 <- m[ !is.na(aret_ytd_v2) & !is.na(aret_ytd_v2) &  !is.na(expret1year_self_rate)
           & !is.na(retrecall_30day_rate) & !is.na(retrecall_1year_rate)]

f1 <- felm(expret30day_self_rate ~ aret_4_1week_v2 + aret_ytd_v2 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample1)

f2 <- felm(expret30day_self_rate ~ aret_4_1week_v2  + aret_ytd_v2 + retrecall_30day_rate + retrecall_1year_rate | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample1)

f3 <- felm(expret1year_self_rate ~ aret_4_1week_v2  + aret_ytd_v2 | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample2)

f4 <- felm(expret1year_self_rate ~ aret_4_1week_v2  + aret_ytd_v2 + retrecall_30day_rate + retrecall_1year_rate  | age + gender + education + total_wealth + total_income +
             accountcheck_freq + newscheck_freq + discussion_freq + num_wechat + type | 0 | date, sample2)

stargazer(f1, f2, f3, f4,
          align = TRUE, dep.var.labels.include = TRUE,
          covariate.labels = c("Actual own return, 1M", "Actual own return, 1Y", "Recalled own return, 1M", "Recalled own return, 1Y"),
          omit.stat = c("LL", "ser", "F", "rsq"), ord.intercepts = FALSE, no.space = TRUE,
          single.row = FALSE, column.sep.width = "0pt", digits = 2
)
